#!/usr/bin/python
# -*- coding:UTF8 -*-
import requests
import os
from lxml import etree

# 获取数据
# 判断路径是否存在
if not os.path.exists('第一季/测试数据/财务资料'):
    os.makedirs('第一季/测试数据/财务资料')

file_url = 'https://www.jkl.com.cn/newsList.aspx'
user_agent = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36'}
keys = []
values = []
for current in range(1, 4):
    params = {
        "current": current,
        "TypeId": 10009
    }
    r = requests.get(url=file_url, headers=user_agent, params=params).text
    # print(r)
    # 解析数据
    analytical_data = etree.HTML(r)
    pdf_links = analytical_data.xpath('//div[@class="newsLis"]//li//@href')
    pdf_names = analytical_data.xpath('//div[@class="newsLis"]//li//a/text()')
    # print(pdf_name)
    # print(pdf_link)

    for pdf_name in pdf_names:
        pdf_name = pdf_name.strip()  # 对pdf的名称去空格
        keys.append(pdf_name)

    for pdf_link in pdf_links:
        pdf_link_urls = 'https://www.jkl.com.cn' + pdf_link  # 拼接文件扽网址
        values.append(pdf_link_urls)
# print(keys)
# print(values)
dicts = dict(zip(keys, values))
# print(dicts)
for keys, values in dicts.items():
    # print(keys,values)
    file_extension = values.split('.')[-1]  # 获取pdf后缀
    file_path = './测试数据/财务资料/' + keys + '.' + file_extension  # 拼接路径
    r = requests.get(url=values, headers=user_agent).content
    # print(file_path)
    # 写入文件
    with open(file_path, mode='wb') as file:
        file.write(r)
        print(keys, '下载成功！！！')
